Viz 1

Vergleich Schweden Deutschland - Indizienden oder positiv-Tests oder relative Fallzahlen oder alles (timeseries) Inzidenz- und Testdaten

Attaining the data

For attaining the covid-19 policy data we read the data from the github of the Coronavirus Government Response Tracker Project.

# read policies csv-file
policies <- read.csv('https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv')

After reading the data, we take a glimpse on the data.

# take a glimpse of the policy data
glimpse(policies)
## Rows: 87,715
## Columns: 47
## $ CountryName                           <chr> "Aruba", "Aruba", "Aruba", "Aru…
## $ CountryCode                           <chr> "ABW", "ABW", "ABW", "ABW", "AB…
## $ RegionName                            <chr> "", "", "", "", "", "", "", "",…
## $ RegionCode                            <chr> "", "", "", "", "", "", "", "",…
## $ Jurisdiction                          <chr> "NAT_TOTAL", "NAT_TOTAL", "NAT_…
## $ Date                                  <int> 20200101, 20200102, 20200103, 2…
## $ C1_School.closing                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ C1_Flag                               <int> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ C2_Workplace.closing                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ C2_Flag                               <int> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ C3_Cancel.public.events               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ C3_Flag                               <int> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ C4_Restrictions.on.gatherings         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ C4_Flag                               <int> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ C5_Close.public.transport             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ C5_Flag                               <int> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ C6_Stay.at.home.requirements          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ C6_Flag                               <int> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ C7_Restrictions.on.internal.movement  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ C7_Flag                               <int> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ C8_International.travel.controls      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ E1_Income.support                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ E1_Flag                               <int> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ E2_Debt.contract.relief               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ E3_Fiscal.measures                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ E4_International.support              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ H1_Public.information.campaigns       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ H1_Flag                               <int> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ H2_Testing.policy                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ H3_Contact.tracing                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ H4_Emergency.investment.in.healthcare <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ H5_Investment.in.vaccines             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ H6_Facial.Coverings                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ H6_Flag                               <int> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ M1_Wildcard                           <lgl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ ConfirmedCases                        <int> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ ConfirmedDeaths                       <int> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ StringencyIndex                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ StringencyIndexForDisplay             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ StringencyLegacyIndex                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ StringencyLegacyIndexForDisplay       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ GovernmentResponseIndex               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ GovernmentResponseIndexForDisplay     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ContainmentHealthIndex                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ContainmentHealthIndexForDisplay      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ EconomicSupportIndex                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ EconomicSupportIndexForDisplay        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…

For the meanings of specific policies take a look on the Codebook of the project.

The data set gets filtered for german policies and the relevant policies get selected.

# a function for pre filtering

filtering <- function(country_name, policy_filter) {
   policies_ger <- policies %>%
                filter(CountryName==country_name) %>% # filter policies by germany
                select(policy_filter) %>% # select policies of interest
                mutate(Date=as.Date(as.character(Date),
                                    format = "%Y%m%d")) %>% # change date format
                na.omit() %>% # remove rows with na entries
                arrange(Date) # order rows by date
}
# a function to build start and stop labels for the equivalent policy
build_start_stop_labels <- function(policies, policy, label){
  # get selected policy data
  selected_policy <- policies %>%
                      select(Date, policy)

  # build bit-mask for policy changes
  bit_mask <- selected_policy[,policy] -  lag(selected_policy[,policy], n = 1) != 0

  # set last entry true (is na because of lag)
  bit_mask[is.na(bit_mask)] <- TRUE
  
  # initialize start and stop list
  start <- c()
  stop <- c()

  # initialize a state variable for tracking start position
  start_state <- TRUE

  # iterate through rows for finding start and stop positions
  for(row in 1:length(bit_mask)){
    # check whether it's a stop position
    if(bit_mask[row] & !start_state){
      stop <- c(stop, row-1)
      start_state <- TRUE
    }
    # check whether it's a start position
    if(bit_mask[row] & start_state){
      start <- c(start, row)
      start_state <- FALSE
    }
  }

  # add last row as stop
  stop <- c(stop, row)
  
  # retrieve start and stop dates from the data set
  start_date <- selected_policy[start, 'Date']
  stop_date <- selected_policy[stop, 'Date']
  # retrieve event number from the data set
  events <- selected_policy[start, policy]
  
  # return dataframe with start/stop dates for the polcies
  return(data.frame(event = events, start = start_date, end = stop_date,
                    group = label))
}
# set filter for columns of interest
policy_filter <- c('CountryCode', 'Date', 'C8_International.travel.controls',
  'C6_Stay.at.home.requirements', 'C4_Restrictions.on.gatherings')

# retrieve policy data
policies_ger <- filtering('Germany', policy_filter)
policies_se <- filtering('Sweden', policy_filter)

timeline_data <- rbind(build_start_stop_labels(policies_ger,
                                               'C4_Restrictions.on.gatherings',
                                               'Germany'),
                       build_start_stop_labels(policies_se,
                                               'C4_Restrictions.on.gatherings',
                                               'Sweden'))


# load vistime
library(vistime)

# determine range of events
event_range <- timeline_data %>% select(event) %>% range()

color_map <- rev(heat.colors(event_range[2] + 5))

timeline_data %<>% mutate(color = color_map[as.integer(event)+4]) %>%
                   mutate(event = as.character(event))


test <- color_map[6:10]

# plot timeline
time_line_plot <- timeline_data %>% gg_vistime() +
                  scale_color_manual(values = c('grey',
                                                'green',
                                                'blue',
                                                'red',
                                                'yellow',
                                                'black'))

library(viridis)
color_map <- rev(viridis(event_range[2]+1))

time_line_plot <- ggplot(timeline_data, aes(x=start, xend=end, y=group, yend=group, color=event)) +
  geom_segment(size=10) +
  scale_color_manual(labels=c('no restrictions', 'above 1000 people','101-1000 people', '11-100 people', '10 people or less'), values = color_map) +
  labs(x = 'Date', y = 'Country',
       colour="Restrictions on\n gatherings")


time_line_plot

# retrieve covid data from covid.ourworldindata.org git repository
covid_data <- read.csv('https://covid.ourworldindata.org/data/owid-covid-data.csv')

# filter for datasets of interest
covid_data %<>% filter(location=='Germany' | location=='Sweden') %>% # filter policies by germany
                select(location, date,
                       new_cases_smoothed_per_million) %>% # select columns of interest
                na.omit() %>% # remove rows with na entries              
                mutate(date=as.Date(as.character(date),
                                    format = "%Y-%m-%d")) # change date format
# retrieve swedish data from dataframe and plot
library(magrittr)
# retrieve covid data from covid.ourworldindata.org git repository
covid_data_df <- read.csv('https://covid.ourworldindata.org/data/owid-covid-data.csv')

covid_data <- covid_data_df

covid_data %<>% filter(location=='Sweden' | location=='Germany') %>%
          select(location, date,
                 new_cases_smoothed_per_million) %>% # select columns of interest
          na.omit() %>% # remove rows with na entries             
          mutate(date=as.Date(as.character(date),
                              format = "%Y-%m-%d")) # save date in date format

COLORS <- c(Germany = "steelblue", Sweden ="darkred")

lineplot <- covid_data %>% ggplot(aes(x = date, y = new_cases_smoothed_per_million,
                   group = location, color = location)) +
  geom_line() +
  scale_color_manual(values = COLORS) +
  labs(x = '', y = 'Smoothed New Cases\n per\n Million',
       colour="Countries")

lineplot

library(cowplot)
plot_grid(lineplot, time_line_plot,
          ncol = 1, align='v')

Zeitreihen in Facets, Massnahmen

Viz 2

Since the crisis-response seems to be different and we got the impression of the use of sweden as a bad example in German media, we wanted to see, whether the reporting in both countries about the respective other differed as well. The first aspect we found interesting, was the amount of articles over time.

sentiment_time_series <-
  read_rds('./../data/documents_with_sentiments.rds') %>%
  mutate(publish_date = lubridate::as_date(as.Date(publish_date))) %>%
  group_by(country, publish_date) %>%
  summarise(`number of articles` = n()) %>%
  ungroup() %>%
  group_by(country) %>%
  group_split() %>%
  map(~mutate(., across(where(is.numeric), ~ 100 * . / max(.)))) %>% 
  map_dfr( ~ mutate(., across(
    where(is.numeric),
    ~ stats::filter(., filter = dnorm(seq(-2, 2, length.out = 7)) /
                      sum(dnorm(
                        seq(-2, 2, length.out = 7)
                      ))),
    .names = 'filter_{.col}'
  ))) %>%
  rename('original_number of articles' = 'number of articles') %>%
  pivot_longer(
    cols = where(is.numeric),
    values_to = 'value',
    names_to = c('quality', 'indicator'),
    names_sep = '_'
  ) %>%
  pivot_wider(names_from = quality,
              values_from = value)
  


covid_data %<>% 
  group_by(location) %>% 
  group_split() %>% 
  map_dfr(~mutate(., cases = 100 * new_cases_smoothed_per_million/max(new_cases_smoothed_per_million)))


article_timeseries_viz <- 
  sentiment_time_series %>% 
  ggplot(aes(x = publish_date, y = original)) +
  geom_point(color = scales::muted('blue'), alpha = .25) +
  geom_line(aes(y = filter), color = scales::muted('blue')) +
  facet_grid(rows = vars(country),scales = 'fixed') +
  geom_line(data = covid_data, aes(y = cases, x = date, color = location)) +
  scale_color_manual(values = COLORS) +
  theme(legend.position = 'bottom') +
  labs(x = 'date of pulication', 
       y = 'number of articles in % of maximum',
       color = 'corona case numbers') 
  
plotly::ggplotly(article_timeseries_viz) ### oder einfach nur ggplot?

Total amount of articles per day concerning the respective country are displayed as points, the seven-day moving gaussian average of the amount is depicted by the dark blue line. The dashed lines depict the case numbers per 100.000.000 Inhabitants. All amounts are scaled, so that the relative maximum in the timeframe is set to 100% to render them comparable, since the absolute number of articles as well as the case numbers differ widely.

In comparing these time-courses with the infection-rates, one can recognize signs of a similarity in the number of German mentions of Sweden and Corona and the amount of cases in Sweden. Since the amount of articles seems to losely be coupled to the amount of cases in the respective other country, we will look at the content of these reports.

Viz 3

Let’s start by using the rather patchy sentiments we were able to gather to generate a few maps

library(eurostat)
library(sf)

sf_data <- get_eurostat_geospatial(resolution = '10',
                                   nuts = 3) %>%
  filter(CNTR_CODE %in% c('SE')) %>%
  bind_rows(get_eurostat_geospatial(resolution = '10',
                                    nuts = 1) %>%
              filter(CNTR_CODE %in% c('DE'))) %>% 
  mutate(NAME_LATN = str_to_lower(NAME_LATN))


se_countrycodes <- read_tsv('https://www.iso.org/obp/ui/#iso:code:3166:SE')


regional_sents <- read_rds('./../data/documents_with_sentiments.rds') %>% 
  group_by(country) %>% 
  group_split() %>% 
  map_dfr(~group_by(.,pub_state,country) %>%  summarise(sentiment = mean(m_sentiment)))

Now we need to cross-reference the Mediacloud countrycodes (ISO 3166:SE, ISO 3166:DE) with the eurostat ones:

iso_3166 <- read_delim(
  'Provinz;Code
  Blekinge län;SE-K
  Dalarnas län;SE-W
  Gotlands län;SE-I
  Gävleborgs län;SE-X
  Hallands län;SE-N
  Jämtlands län;SE-Z
  Jönköpings län;SE-F
  Kalmar län;SE-H
  Kronobergs län;SE-G
  Norrbottens län;SE-BD
  Skåne län;SE-M
  Stockholms län;SE-AB
  Södermanlands län;SE-D
  Uppsala län;SE-C
  Värmlands län;SE-S
  Västerbottens län;SE-AC
  Västernorrlands län;SE-Y
  Västmanlands län;SE-U
  Västra Götalands län;SE-O
  Örebro län;SE-T
  Östergötlands län;SE-E
  Baden-Württemberg;DE-BW
  Bayern;DE-BY
  Berlin;DE-BE
  Brandenburg;DE-BB
  Bremen;DE-HB
  Hamburg;DE-HH
  Hessen;DE-HE
  Mecklenburg-Vorpommern;DE-MV
  Niedersachsen;DE-NI
  Nordrhein-Westfalen;DE-NW
  Rheinland-Pfalz;DE-RP
  Saarland;DE-SL
  Sachsen;DE-SN
  Sachsen-Anhalt;DE-ST
  Schleswig-Holstein;DE-SH
  Thüringen;DE-TH',
  delim = ';'
) %>% 
  mutate(across(everything(), ~str_trim(.)))%>% 
  mutate(Provinz = str_to_lower(Provinz))


regional_sents %<>%
  right_join(iso_3166, by = c(pub_state = 'Code'))

This dataset can now be used to depict the overall regional sentiment in both countries:

reg_sent_plot <- function(data,legend){
  p <- ggplot(data) +
    geom_sf(aes(geometry = geometry,fill = sentiment), color = 'grey', expand = F) +
    scale_fill_gradient2(low = scales::muted('red'), 
                         mid = 'white',
                         high = scales::muted('blue'),
                         na.value = 'lightgrey',
                         limits = c(-5,5))  +
    cowplot::theme_map()
    if(!legend){
      p <- p + theme(legend.position = 'none')
    }else{
      p <- p + theme_void() + theme(legend.position = 'right')
    }
  p +
    coord_sf(crs = st_crs("+proj=merc"))
}

plots <- sf_data %>% 
  left_join(regional_sents[,c('Provinz', 'sentiment')],by = c('NAME_LATN' = 'Provinz')) %>% 
  group_by(CNTR_CODE) %>%
  group_split() %>%
  purrr::map(~reg_sent_plot(.,legend = F))

legend <- cowplot::get_legend(reg_sent_plot(mutate(sf_data,sentiment = 0),legend = T))

cowplot::plot_grid(
  cowplot::plot_grid(plotlist = plots),
  legend,
  rel_widths =  c(4,.4)
)

And finally, let’s look at some wordclouds:

wc_data <- read_rds('./../data/wordcloud_data.rds')


wordcloud <- function(wc_data,country){
  require(ggwordcloud)
  p <- get_eurostat_geospatial(resolution = '10',
                                   nuts = 0) %>%
    filter(CNTR_CODE %in% c(country)) %>% 
    ggplot() +
    geom_sf(fill = 'black', color = 'black') +
    coord_sf(crs = st_crs("+proj=moll")) +
    theme_void()
  ggsave('temp.png', plot = p)
  img <- png::readPNG('temp.png')
  dummy <- colSums(img[,,1]) != max(colSums(img[,,1]))
  img <- img[,dummy,]
  dummy <- rowSums(img[,,1]) != max(rowSums(img[,,1]))
  img <- img[dummy,,]
  
  wc <- ggplot(wc_data,aes(label = translation, size = n, color = sentiment)) + 
    geom_text_wordcloud(mask = img,rm_outside = T)+
    scale_color_gradient2(low = scales::muted('red'), 
                         mid = 'grey',
                         high = scales::muted('blue'),
                         na.value = 'darkgrey',
                         limits = c(-5,5))
  file.remove('temp.png')
  wc
}
 
ger_wc <- wc_data %>% 
  filter(country == 'Germany') %>% 
  arrange(desc(n)) %>% 
  head(200) %>% 
  wordcloud(country = 'DE')


swe_wc <- wc_data %>% 
  filter(country == 'Sweden') %>% 
  arrange(desc(n)) %>% 
  head(200) %>% 
  wordcloud(country = 'SE')

cowplot::plot_grid(ger_wc, swe_wc)